Let’s work with the data on contributions to Candidates and Political Committees in Washington State.
The WA portal for OpenData has this data on this website.
I have already prepare a data set, let’s get it from GitHub:
link='https://github.com/EvansDataScience/data/raw/master/contriWA.RDS'
#getting the data TABLE from the file in the cloud:
contriWA=readRDS(file=url(link))
This is what we have:
str(contriWA,width = 60, strict.width = 'cut')
## 'data.frame': 3096599 obs. of 10 variables:
## $ id : chr "6229365.rcpt" "6229366.rcp"..
## $ contributor_state : chr "WA" "WA" "WA" "WA" ...
## $ contributor_zip : chr "98501" "98501" "98501" "98"..
## $ amount : num 106 110 70 41 100 ...
## $ election_year : int 2019 2019 2019 2019 2019 201..
## $ party : Factor w/ 9 levels "","CONSTITUT"..
## $ cash_or_in_kind : Factor w/ 2 levels "Cash","In ki"..
## $ contributor_location: chr "(47.02872, -122.87765)" "("..
## $ Lat : num 47 47 47 47 47.7 ...
## $ Lon : num -123 -123 -123 -123 -122 ...
The data is per year, so let’s check the years available:
sort(unique(contriWA$election_year))
## [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
## [15] 2023
The data includes a spatial field we can take advantage of, the zip code:
library(magrittr)
sort(unique(contriWA$contributor_zip))%>%head(20)
## [1] "98001" "98002" "98003" "98004" "98005" "98006" "98007" "98008"
## [9] "98009" "98010" "98011" "98012" "98013" "98014" "98015" "98016"
## [17] "98017" "98018" "98019" "98020"
When you have a way to organize you data by a column that represents a geographical unit, you can plot your data on a map.
However, in the current format, each row represents a contribution; then, we need a data frame where each row is a ZIP code, and the other columns tell us, for example, some aggregate / summary value per zip code. For example, this is the total contributed per zip code
WA_zip_contri=aggregate(data=contriWA,amount~contributor_zip, sum)
#see result
str(WA_zip_contri)
## 'data.frame': 1111 obs. of 2 variables:
## $ contributor_zip: chr "98001" "98002" "98003" "98004" ...
## $ amount : num 2144939 1756135 7315662 30097329 12897628 ...
The zip code should not be number, so it is good we keep it as text.
Let’s see the values of amount:
summary(WA_zip_contri$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 778 10383 664156 264162 36709970
Let’s turn the values into a thousand unit:
WA_zip_contri$amount=WA_zip_contri$amount/1000
This data frame has the sum of contributions for every zip code since the election year 2009, including the elections up to 2023.
You need a map where each element (shape) is a zip code, in this case for Washington State. I have shared that in Canvas (which I got from here. Let me guide you how to produce the map in the format needed:
When prompted, go to the folder with the shape file and select all of them:
Then click import:
This is my link (please change it to yours):
mapLink="https://github.com/EvansDataScience/data/raw/master/WAzips.json"
With the help of geojsonio, we can get the map:
library(geojsonio)
##
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
##
## pretty
PROJmap="+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
wazipMap=topojson_read(mapLink,crs=PROJmap,stringsAsFactors = FALSE)
## Reading layer `SAEP_ZIP_Code_Tabulation_Areas' from data source `https://github.com/EvansDataScience/data/raw/master/WAzips.json' using driver `GeoJSON'
## Simple feature collection with 598 features and 102 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7428 ymin: 45.54354 xmax: -116.9157 ymax: 49.0025
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Check size of map from cloud:
object.size(wazipMap)
## 7954872 bytes
Solving issues that are generally present in map files:
library(lwgeom)
## Linking to liblwgeom 2.5.0dev r16016, GEOS 3.6.1, PROJ 4.9.3
wazipMap=st_make_valid(wazipMap)
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
Changes in object size:
object.size(wazipMap)
## 7958248 bytes
Simplifying shapes:
library(rmapshaper)
## Registered S3 method overwritten by 'geojsonlint':
## method from
## print.location geojsonio
wazipMap=ms_simplify(wazipMap)
Changes in object size:
object.size(wazipMap)
## 1358760 bytes
We have a map:
library(ggplot2)
## Registered S3 method overwritten by 'dplyr':
## method from
## print.location geojsonlint
ggplot(data=wazipMap) + geom_sf()
Notice you can zoom in this way:
# north is positive / south is negative
# east is positive / west is negative
ggplot(data=wazipMap) +
geom_sf() +
coord_sf(xlim = c(-121, -123),
ylim = c(47, 48.5))
You could use this next to a another big map to focus in a particular area.
Let’s see the columns available:
names(wazipMap)
## [1] "id" "OBJECTID" "ZCTA5CE10" "GEOID10" "MTFCC10"
## [6] "FUNCSTAT10" "PARTFLG10" "INTPTLON10" "INTPTLAT10" "Version"
## [11] "POP2000" "POP2001" "POP2002" "POP2003" "POP2004"
## [16] "POP2005" "POP2006" "POP2007" "POP2008" "POP2009"
## [21] "POP2010" "POP2011" "POP2012" "POP2013" "POP2014"
## [26] "POP2015" "POP2016" "POP2017" "HHP2000" "HHP2001"
## [31] "HHP2002" "HHP2003" "HHP2004" "HHP2005" "HHP2006"
## [36] "HHP2007" "HHP2008" "HHP2009" "HHP2010" "HHP2011"
## [41] "HHP2012" "HHP2013" "HHP2014" "HHP2015" "HHP2016"
## [46] "HHP2017" "GQ2000" "GQ2001" "GQ2002" "GQ2003"
## [51] "GQ2004" "GQ2005" "GQ2006" "GQ2007" "GQ2008"
## [56] "GQ2009" "GQ2010" "GQ2011" "GQ2012" "GQ2013"
## [61] "GQ2014" "GQ2015" "GQ2016" "GQ2017" "HU2000"
## [66] "HU2001" "HU2002" "HU2003" "HU2004" "HU2005"
## [71] "HU2006" "HU2007" "HU2008" "HU2009" "HU2010"
## [76] "HU2011" "HU2012" "HU2013" "HU2014" "HU2015"
## [81] "HU2016" "HU2017" "OHU2000" "OHU2001" "OHU2002"
## [86] "OHU2003" "OHU2004" "OHU2005" "OHU2006" "OHU2007"
## [91] "OHU2008" "OHU2009" "OHU2010" "OHU2011" "OHU2012"
## [96] "OHU2013" "OHU2014" "OHU2015" "OHU2016" "OHU2017"
## [101] "Shape__Are" "Shape__Len" "geometry"
The column with the zip code has the name ZCTA5CE10, let’s check its data type:
str(wazipMap$ZCTA5CE10)
## chr [1:573] "98001" "98002" "98003" "98004" "98005" "98006" "98007" ...
It is also text, the same as the contributions data frame.
The, having common columns, we can merge. Keep in mind that as the zip codes in each are under different column names, I tell the merge function what columns to use:
# put map first:
layerContrib=merge(wazipMap,WA_zip_contri,
by.x='ZCTA5CE10',
by.y='contributor_zip',
all.x=F) # if no coincidence don't keep shape.
There is a new map: layerContrib. However, this merged map may lack some zip codes, so we can prepare a basemap:
# This will make just a border of the state
baseMap <- ms_dissolve(wazipMap)
This is the base map, which may help us show the missing shapes.
ggplot(baseMap) + geom_sf()
We will plot the the amounts contributed, which will be organised into quintiles. Let’s follow these steps:
library(RColorBrewer)
library(tmap)
numberOfClasses = 5
colorForScale='YlGnBu' # color scale
layerContrib$cut=cut_number(layerContrib$amount,numberOfClasses,
ordered_result=T,
dig.lab=5)
5.1 Plot basic version
baseLayer=ggplot(data = baseMap) +geom_sf()
layer1 = baseLayer + geom_sf(data = layerContrib, aes(fill=cut),color=NA,show.legend = T) +
scale_fill_brewer(palette = colorForScale,
name = "Intervals by 1,000")
layer1
5.2 Add elements
library(ggspatial)
creditsText="EPSG: 4326\nProj=longlat\ndatum=WGS84"
layer1s = layer1 + annotation_scale(location = "bl",
width_hint = 0.2,
plot_unit = 'mi',
unit_category='imperial',
style='ticks'
)
layer1ns = layer1s + annotation_north_arrow(location = "tl",
style = north_arrow_minimal,
height = unit(0.3, "in"))
layer1ns
The dataframe contriWA has columns with coordinates, which represent a point in a map:
contriWA[,c(9:10)]%>% head()
## Lat Lon
## 1 47.02872 -122.8777
## 2 47.01362 -122.8755
## 3 47.01362 -122.8755
## 4 47.01362 -122.8755
## 5 47.67672 -122.3517
## 6 47.66897 -122.4044
Let’s use those columns to create a spatial point data frame, while making sure it has the same coordinate system as our map:
contriWA_geo= st_as_sf(contriWA,
coords = c("Lon", "Lat"), #Coord colnames in that order
crs = sp::CRS(PROJmap))# assign a CRS of map
Verifying projection:
library(tmaptools)
get_projection(contriWA_geo)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Our new spatial points dataframe looks the same:
names(contriWA_geo)
## [1] "id" "contributor_state" "contributor_zip"
## [4] "amount" "election_year" "party"
## [7] "cash_or_in_kind" "contributor_location" "geometry"
But it is not a simple data frame:
class(contriWA_geo)
## [1] "sf" "data.frame"
Now, plot the new map with the data fro 2010 (select the right character for the point) on top of our WA state map:
contriWA_geo2010=contriWA_geo[contriWA_geo$election_year==2010,]
pointsMap= baseLayer + geom_sf(data = contriWA_geo2010,
size = 1,shape=20,
alpha=0.2,color = 'red')
pointsMap
You can change the shape of dots selecting different numbers:
Imagine you need the polygons at the bottom and top deciles:
quantile(layerContrib$amount, c(.1,.9))
## 10% 90%
## 3.44705 1906.42318
Sub maps:
#filters:
top10=quantile(layerContrib$amount, c(.9))
bot10=quantile(layerContrib$amount, c(.1))
#newMaps!
mapBot=layerContrib[layerContrib$amount<=bot10,]
mapTop=layerContrib[layerContrib$amount>=top10,]
legendText="Areas to watch"
shrinkLegend=0.4
title="Top and Botton Average Contribution to elections in WA (2009-2023)"
Plotting the map:
# one group
topLayer= baseLayer + geom_sf(data=mapTop,fill='red',color=NA)
# other group
botTop = topLayer+ geom_sf(data=mapBot,fill='green',color=NA)
# altogether:
fullMap = botTop + annotate(x=-118.5,
y=45.7,
geom = 'text',
label=creditsText,
size=2)
fullMap
If you need to plot more than one map:
library(ggpubr)
layer1ns2=layer1ns +
theme(legend.background = element_rect(size=0.3,
linetype="solid",
colour ="grey"),
legend.position = 'top',
legend.text = element_text(size = 5),
legend.title = element_text(size = 7)) +
guides(fill=guide_legend(title.position = "top",title.hjust = 0.5))
ggarrange(layer1ns2,fullMap,ncol = 1)